# coding=utf-8
from locale import *
import sys
import datetime
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup


def get_mac_address():
    import uuid
    node = uuid.getnode()
    mac = uuid.UUID(int=node).hex[-12:]
    return mac


def validate_mac_address():
    import urllib.request
    f = urllib.request.urlopen('http://amazon-ceping.xunhuanle.com/publicwelcome/getallmacaddress')
    ret_content_bytes = f.read()
    ret_content_str = ret_content_bytes.decode()
    return ret_content_str


mac_address = get_mac_address()
print("Your macaddress is below:")
print(mac_address)
validation_content = validate_mac_address()
if mac_address not in validation_content:
    print("Please submit your unicode '"+mac_address+"' to administrator!!!")
    sys.exit()


file_name = "found_link_"+datetime.datetime.now().strftime('%Y-%m-%d_%H_%M_%S')+".xls"

search_url = input("Input Search URL:")
print("Search url is:", search_url)

keyWords = input("Input Unavailable Keywords:")
keyWords = keyWords.strip()
print("Unavailable Keywords is:", keyWords)

setlocale(LC_NUMERIC, 'English_US')

# 设置不加载图片
'''
firefoxProfile = FirefoxProfile()
firefoxProfile.set_preference('permissions.default.image', 2)
firefoxProfile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
driver = webdriver.Firefox(firefoxProfile)
'''
driver = webdriver.Firefox()
driver.get(search_url)

while 1:
    soup = BeautifulSoup(driver.page_source, "html.parser")
    allItems = soup.select('li.s-result-item')
    for item in allItems:
        file = open(file_name, 'a')
        span_name = item.select_one('span[name]')
        if span_name is not None:
            # 查询产品状态是否unavailable是否可用
            item_text = item.get_text()
            if item_text is not None:
                if keyWords is not None:
                    if keyWords in item_text:
                        asin_text = span_name.get("name")
                        star_ele = span_name.select_one('i.a-icon-star > span')
                        if star_ele is not None:
                            star_text = star_ele.get_text()
                        else:
                            star_text = "Null"
                        pingLun_ele = span_name.select_one(' + a')
                        if pingLun_ele is not None:
                            pingLun_text = pingLun_ele.get_text()
                            pingLunNum = int(atof(pingLun_text))
                            # 获取URL
                            m_url = item.select_one('h2').parent.get("href")
                            print(m_url + '\t' + asin_text + '\t' + star_text + '\t' + pingLun_text + " write to file")
                            file.writelines(m_url + '\t' + asin_text + '\t' + star_text + '\t' + pingLun_text + '\n')
                else:
                    asin_text = span_name.get("name")
                    star_ele = span_name.select_one('i.a-icon-star > span')
                    if star_ele is not None:
                        star_text = star_ele.get_text()
                    else:
                        star_text = "Null"
                    pingLun_ele = span_name.select_one(' + a')
                    if pingLun_ele is not None:
                        pingLun_text = pingLun_ele.get_text()
                        pingLunNum = int(atof(pingLun_text))
                        # 获取URL
                        m_url = item.select_one('h2').parent.get("href")
                        print(m_url + '\t' + asin_text + '\t' + star_text + '\t' + pingLun_text + " write to file")
                        file.writelines(m_url + '\t' + asin_text + '\t' + star_text + '\t' + pingLun_text + '\n')
        file.close()
    WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.ID, 'pagnNextLink')))
    nextPageUrl = driver.find_element_by_id("pagnNextLink").get_attribute("href")
    print(nextPageUrl)
    driver.get(nextPageUrl)

driver.quit()


